Package org.terrier.structures.indexing.singlepass

Source Code of org.terrier.structures.indexing.singlepass.BlockPostingInRun$BlockPIRPostingIterator

/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is BlockPostingInRun.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
*   Roi Blanco (rblanc{at}@udc.es)
*   Craig Macdonald (craigm{at}dcs.gla.ac.uk)
*/
package org.terrier.structures.indexing.singlepass;

import java.io.IOException;

import org.terrier.compression.BitOut;
import org.terrier.structures.postings.BlockPosting;
import org.terrier.structures.postings.BlockPostingImpl;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.structures.postings.WritablePosting;
/** Class holding the information for a posting list read
* from a previously written run at disk. Used in the merging phase of the Single pass inversion method.
* This class knows how to append itself to a {@link org.terrier.compression.BitOut} and it
* represents a posting with blocks information <code>(tf, df, [docid, idf, blockFr [blockid]])</code>
* @author Roi Blanco
*
*/
public class BlockPostingInRun extends SimplePostingInRun
{
  /**
   * Constructor for the class.
   */
  public BlockPostingInRun(){
    super();
  }

  /**
   * Writes the document data of this posting to a {@link org.terrier.compression.BitOut}
   * It encodes the data with the right compression methods.
   * The stream is written as <code>d1, idf(d1), blockNo(d1), bid1, bid2, ...,  d2 - d1, idf(d2), blockNo(d2), ...</code> etc
   * @param bos BitOut to be written.
   * @param last int representing the last document written in this posting.
   * @param runShift amount of delta to apply to the first posting read.
   * @return The last posting written.
   */
  public int append(BitOut bos, int last, int runShiftthrows IOException{
    int current = runShift - 1;
    for(int i = 0; i < termDf; i++){
      int docid = postingSource.readGamma() + current;
      bos.writeGamma(docid - last);
      bos.writeUnary(postingSource.readGamma());
      current = last = docid; 
     
      //now deal with blocks
      final int numOfBlocks = postingSource.readUnary() -1;
      bos.writeUnary(numOfBlocks+1);
      if (numOfBlocks > 0)
        for(int j = 0; j < numOfBlocks; j++){
          /* we're reading and saving gaps here, not blockids */
          bos.writeGamma(postingSource.readGamma());
        }
    }
    try{
      postingSource.align();
    }catch(Exception e){
      // last posting
    }
    return last;
  }
 
  protected class BlockPIRPostingIterator extends PIRPostingIterator implements BlockPosting
  {
    int blockFreq;
    int[] blockIds;
    public BlockPIRPostingIterator(int runShift) {
      super(runShift);
    }

    @Override
    protected void readPostingNotDocid() throws IOException {
      super.readPostingNotDocid();
      blockIds = new int[postingSource.readUnary() -1];
      blockIds[0] = postingSource.readGamma()-1;
      for(int i=1;i<blockFreq;i++)
        blockIds[i] = postingSource.readGamma() - blockIds[i-1];
    }

    public int[] getPositions() {
      return blockIds;
    }
   
    public WritablePosting asWritablePosting() {
      WritablePosting bp = new BlockPostingImpl(docid, frequency, blockIds);
      return bp;
    }
  }

  @Override
  public IterablePosting getPostingIterator(int runShift) throws IOException {
    return new BlockPIRPostingIterator(runShift);
  }
}
TOP

Related Classes of org.terrier.structures.indexing.singlepass.BlockPostingInRun$BlockPIRPostingIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.